## END entropy balancing
## #############################################################
## coarsened exact matching
estimator <- c(estimator, "CEM")
cem.out <- cem(treatment="treated", data=mydata.all,
drop=c("sname", "fname", "labour", "tory", "treated",
"lnrealgross", "pscores", "w"))
att.out <- att(cem.out, formula = lnrealgross ~ treated, data=mydata.all)
s <- summary(att.out)
estimate <- c(estimate, s[2,1])
SE <- c(SE, s[2,2])
mydata.all$w <- cem.out$w
w0 <- mydata.all$w[mydata.all$treated==0]
w1 <- mydata.all$w[mydata.all$treated==1]
n1 <- sum(mydata.all$treated)
n <- nrow(mydata.all)
n0 <- n - n1
n.eff.0 <- (sum(w0)^2) / sum(w0^2)
n.eff.1 <- (sum(w1)^2) / sum(w1^2)
n.eff <- (n.eff.1 * n.eff.0) / (n.eff.1 + n.eff.0)
n.eff.max <- (n1 * n0) / (n1 + n0)
Eff.n <- c(Eff.n, n.eff)
Eff.n.ratio <- c(Eff.n.ratio, n.eff/n.eff.max)
DFBETA.out <- DFBETA.weighted(mydata.all$lnrealgross, mydata.all$treated,
mydata.all$w)
DFBETA.min <- c(DFBETA.min, min(DFBETA.out))
DFBETA.max <- c(DFBETA.max, max(DFBETA.out))
min.ind <- which.min(DFBETA.out)
max.ind <- which.max(DFBETA.out)
DFBETA.min.who <- c(DFBETA.min.who, paste(mydata.all[min.ind, "fname"],
mydata.all[min.ind, "sname"]))
DFBETA.max.who <- c(DFBETA.max.who, paste(mydata.all[max.ind, "fname"],
mydata.all[max.ind, "sname"]))
extrap.treat <- c(extrap.treat, NA)
extrap.control <- c(extrap.control, NA)
mydata.CEM.att <- mydata.all
mydata.CEM.att$DFBETA.out <- DFBETA.out
## END coarsened exact matching
## #############################################################
### Table 2 (top panel) ###
out.data.att <- data.frame(estimator=estimator,
estimate=estimate,
SE=SE,
#Z = estimate / SE,
Eff.n=Eff.n,
Eff.n.ratio=Eff.n.ratio,
DFBETA.min=DFBETA.min,
DFBETA.min.who=DFBETA.min.who,
DFBETA.max=DFBETA.max,
DFBETA.max.who=DFBETA.max.who,
extrap.control=extrap.control,
extrap.treat=extrap.treat)
print(out.data.att)
#library(xtable)
#xtable(out.data.att, digits=c(0, 0, rep(3, 5), 0, 3, 0, rep(3, 2)))
### Figure 1: DFBETA ###
DFBETA.ATT <- cbind(mydata.reg.att$DFBETA.out,
mydata.reg_cons.att$DFBETA.out,
mydata.sipw.logit.att$DFBETA.out,
mydata.entropy.balancing.att$DFBETA.out,
mydata.genMatching.att$DFBETA.out,
mydata.CEM.att$DFBETA.out)
colnames(DFBETA.ATT) <- c("Regression",
"Regression\n(Constant)",
"PScores",
"Ebal",
"GenMatch",
"CEM")
dfbeta.ATT.max <- apply(DFBETA.ATT, 2, max)
dfbeta.ATT.min <- apply(DFBETA.ATT, 2, min)
DFBETA.ATT <- cbind(mydata.reg.att$DFBETA.out,
mydata.reg_cons.att$DFBETA.out,
mydata.sipw.logit.att$DFBETA.out,
mydata.entropy.balancing.att$DFBETA.out,
#mydata.genMatching.att$DFBETA.out,
mydata.CEM.att$DFBETA.out)
colnames(DFBETA.ATT) <- c("Regression",
"Regression\n(Constant)",
"PScores",
"Ebal",
"GenMatch",
"CEM")
dfbeta.ATT.max <- apply(DFBETA.ATT, 2, max)
dfbeta.ATT.min <- apply(DFBETA.ATT, 2, min)
#pdf("mpsforsale_DFBETA_ATT.pdf", width = 8, height = 8,
#    family = "Times")
plot_DFBETA(DFBETA.ATT, main = NULL)
#dev.off()
DFBETA.ATT <- cbind(mydata.reg.att$DFBETA.out,
mydata.reg_cons.att$DFBETA.out,
mydata.sipw.logit.att$DFBETA.out,
mydata.entropy.balancing.att$DFBETA.out,
#mydata.genMatching.att$DFBETA.out,
mydata.CEM.att$DFBETA.out)
colnames(DFBETA.ATT) <- c("Regression",
"Regression\n(Constant)",
"PScores",
"Ebal",
#"GenMatch",
"CEM")
dfbeta.ATT.max <- apply(DFBETA.ATT, 2, max)
dfbeta.ATT.min <- apply(DFBETA.ATT, 2, min)
#pdf("mpsforsale_DFBETA_ATT.pdf", width = 8, height = 8,
#    family = "Times")
plot_DFBETA(DFBETA.ATT, main = NULL)
#dev.off()
setwd("~/Dropbox/Missing dead/replication/")
library(haven)
library(scales)
library(estimatr)
library(data.table)
library(modelsummary) ## modelsummary(): for lm_robust object
library(extrafont) ## change fonts in plot
### Function
collapseEvent <- function(vector, unit, time,
log = TRUE,
sum = FALSE,
count = FALSE){
# Args:
#   vector:  n x 1 vector to be collapsed by unit-time
#   unit:    n x 1 vector of event unit
#   time:    n x 1 vector of event time
if (sum == TRUE){
out <- tapply(vector, list(unit, time), sum)
if (log == TRUE){
out <- log(out)
}
out <- cbind(rownames(out), out)
out <- as.data.table(out)
names(out)[1] <- "unit"
out <- melt(out, id = 1, na.rm = FALSE,
variable.name = "time",
value.name = "death")
out$unit <- as.numeric(out$unit)
out$death <- as.numeric(out$death)
out$prov_code <- as.numeric(substr(out$unit, 1, 2))
out$time <- as.numeric(as.character(out$time))
}
if (count == TRUE){
out <- tapply(vector, list(unit, time), length)
if (log == TRUE){
out <- log(out)
}
out <- cbind(rownames(out), out)
out <- as.data.table(out)
names(out)[1] <- "unit"
out <- melt(out, id = 1, na.rm = FALSE,
variable.name = "time",
value.name = "count")
out$unit <- as.numeric(out$pref_code)
out$count <- as.numeric(out$count)
out$prov_code <- as.numeric(substr(out$unit, 1, 2))
out$time <- as.numeric(as.character(out$time))
}
return(out)
}
dataCollapse <- function(vector, year.v, treat.ind){
# Args:
#   vector:     n x 1 vector to be collapsed by province-treat
#   year.v:     n x 1 vector
#   treat.ind:  n x 1 vector of treatment indicator
out <- tapply(vector,
list(year.v, treat.ind),
function(x) mean(x, na.rm = T))
return(out)
}
paraPlot <- function(collapMat,
matrix.x.lab = NULL, matrix.title = NULL,
l.mar = 3.5,
hline = NULL,
x.axis.vec = NULL,
y.lab = TRUE, y.lab.names = NULL,
y.min = NULL, y.max = NULL,
legend = TRUE, leg.pos = NULL){
# Args:
#   collapMat:      n x 2 matrix (column 1: control; column 2: treat)
#   matrix.x.lab:   a string of figure x axis label
#   matrix.title:   a string of figure title
#   hline:          a scalar indicate the position of vertical line
#   x.axis.vec:     a length n string vector that shows x axis
#   y.lab:          logical parameter to turn off y label
#   y.lab.names:    a string of y axis lable
#   y.min:          a scalar indicates the minimum value on y axis
#   y.max:          a scalar indicates the maximum value on y axis
#   legend:         logical parameter to turn off legend
#   leg.pos:        a string indicates legend position
pch.cex <- 1.6
ctl.v <- collapMat[, 1]
tr.v <- collapMat[, 2]
#y.max <- max(x)
#y.min <- min(x)
x.len <- nrow(collapMat)
par(mar= c(3.2, l.mar, 1, 1))
plot(NA, xlim = c(0.5, x.len),
ylim = c(y.min, y.max),
type = "n",
xlab = matrix.x.lab,
main = matrix.title,
ylab = "", axes = F, cex.lab = 1.3)
grid(nx = NULL, ny = NULL,
lty = 2,      # Grid line type
col = "gray", # Grid line color
lwd = 0.6)
abline(v = hline, col = alpha("orangered", 0.8), lwd = 1.5)
lines(ctl.v, lty = 2, lwd = 1.5)
points(ctl.v, cex = pch.cex, pch = 1, col = "black")
lines(tr.v, lty = 1, lwd = 1.5)
points(tr.v, cex = pch.cex, pch = 19, col = "black", bg = "black")
axis(1, at = seq(1, x.len, 1), label = x.axis.vec,
cex.axis = 1, las = 1)
if (y.lab == TRUE){
axis(2, at = seq(ceiling(y.min), floor(y.max), 1),
label = seq(ceiling(y.min), floor(y.max), 1),
cex.axis = 1, las = 1)
title(ylab = y.lab.names, line = 2, cex.lab = 1)
}
if (legend == TRUE){
legend(leg.pos, title = NULL,
legend = c("Treated", "Control"),
col = "black",
lty = c(1, 2), cex = 0.8,
pch = c(19, 1),
lwd = c(1, 1))
}
}
mydata <- read_dta("190105_data cleaning.dta")
mydata <- as.data.frame(mydata)
mydata <- subset(mydata, year > 1999 & year < 2006)
mydata.traf <- mydata[mydata$category_manu == 5, ]
mydata.othr <- mydata[mydata$category_manu != 5, ]
treat_group_idx <- c("52", "36", "37", "53") ## by prov_code NOT province_CN
## same content, easier to subset data
## Prefecture-Year data
deathsum_pfy_lg <- collapseEvent(mydata$death, mydata$pref_code, mydata$year,
sum = TRUE)
deathsum_pfy_lg$treat <- ifelse(deathsum_pfy_lg$prov_code %in% treat_group_idx,
1, 0)
deathsum_pfy_lg$time_ind <- ifelse(deathsum_pfy_lg$time >= 2004, 1, 0)
deathsum_pfy_lg$did <- deathsum_pfy_lg$treat * deathsum_pfy_lg$time_ind
trafsum_pfy_lg <- collapseEvent(mydata.traf$death, mydata.traf$pref_code,
mydata.traf$year, sum = TRUE)
trafsum_pfy_lg$treat <- ifelse(trafsum_pfy_lg$prov_code %in% treat_group_idx,
1, 0)
trafsum_pfy_lg$time_ind <- ifelse(trafsum_pfy_lg$time >= 2004, 1, 0)
trafsum_pfy_lg$did <- trafsum_pfy_lg$treat * trafsum_pfy_lg$time_ind
othrsum_pfy_lg <- collapseEvent(mydata.othr$death, mydata.othr$pref_code,
mydata.othr$year, sum = TRUE)
othrsum_pfy_lg$treat <- ifelse(othrsum_pfy_lg$prov_code %in% treat_group_idx,
1, 0)
othrsum_pfy_lg$time_ind <- ifelse(othrsum_pfy_lg$time >= 2004, 1, 0)
othrsum_pfy_lg$did <- othrsum_pfy_lg$treat * othrsum_pfy_lg$time_ind
pref.year <- list(all = deathsum_pfy_lg,
traffic = trafsum_pfy_lg,
others = othrsum_pfy_lg)
## Province-month data
deathsum_pm_lg <- collapseEvent(mydata$death, mydata$prov_code, mydata$month,
sum = TRUE)
deathsum_pm_lg$treat <- ifelse(deathsum_pm_lg$prov_code %in% treat_group_idx,
1, 0)
deathsum_pm_lg$time_ind <- ifelse(deathsum_pm_lg$time >= 522, 1, 0)
deathsum_pm_lg$did <- deathsum_pm_lg$treat * deathsum_pm_lg$time_ind
trafsum_pm_lg <- collapseEvent(mydata.traf$death, mydata.traf$prov_code,
mydata.traf$month, sum = TRUE)
trafsum_pm_lg$treat <- ifelse(trafsum_pm_lg$prov_code %in% treat_group_idx,1, 0)
trafsum_pm_lg$time_ind <- ifelse(trafsum_pm_lg$time >= 522, 1, 0)
trafsum_pm_lg$did <- trafsum_pm_lg$treat * trafsum_pm_lg$time_ind
othrsum_pm_lg <- collapseEvent(mydata.othr$death, mydata.othr$prov_code,
mydata.othr$month, sum = TRUE)
othrsum_pm_lg$treat <- ifelse(othrsum_pm_lg$prov_code %in% treat_group_idx,1, 0)
othrsum_pm_lg$time_ind <- ifelse(othrsum_pm_lg$time >= 522, 1, 0)
othrsum_pm_lg$did <- othrsum_pm_lg$treat * othrsum_pm_lg$time_ind
prov.month <- list(all = deathsum_pm_lg,
traffic = trafsum_pm_lg,
others = othrsum_pm_lg)
t.var <- c("did", "time_ind", "treat")
formula <- as.formula(paste("death ~", paste(t.var, collapse = " + "),
"+ factor(time) + factor(prov_code)"))
## Prefecture-Year
estimate.pfy <- NULL
SE.pfy <- NULL
mod.out.pfy <- list()
for(i in 1:length(pref.year)){
mod <- lm_robust(formula, cluster = prov_code, data = pref.year[[i]])
estimate.pfy <- c(estimate.pfy, coefficients(mod)['did'])
SE.pfy <- c(SE.pfy, summary(mod)$coefficients['did', 2])
mod.out.pfy <- append(mod.out.pfy, list(mod))
}
## Province-month
estimate.pm <- NULL
SE.pm <- NULL
mod.out.pm <- list()
for(i in 1:length(prov.month)){
mod <- lm_robust(formula, cluster = prov_code, data = prov.month[[i]])
estimate.pm <- c(estimate.pm, coefficients(mod)['did'])
SE.pm <- c(SE.pm, summary(mod)$coefficients['did', 2])
mod.out.pm <- append(mod.out.pm, list(mod))
}
pdf("f9_prefYear_did.pdf", family = "Times New Roman",
width = 3.4, height = 3)
y.ind <- c(1.2, 1.4, 1.6)
y.ind <- rev(y.ind)
par(mar= c(3,6,0,1), mgp=c(2, 0.5, 0))
plot(NA, xlim = c(-0.7, 0.7),
ylim = c(0.8, 2),
type = "n",
xlab = "DD Coefficient",
ylab = "", axes = F, cex.lab = 1.1)
segments(x0 = estimate.pfy - qnorm(0.95) * SE.pfy,
y0 = y.ind,
x1 = estimate.pfy + qnorm(0.95) * SE.pfy,
y1 = y.ind,
col = alpha("black", 0.45), lwd = 6)
segments(x0 = estimate.pfy - qnorm(0.975) * SE.pfy,
y0 = y.ind,
x1 = estimate.pfy + qnorm(0.975) * SE.pfy,
y1 = y.ind,
col = "black", lwd = 2)
points(estimate.pfy, y.ind, cex = 2, pch = 19,
col = "black", bg = "black")
axis(1, at = seq(-0.5, 0.5, 0.5), cex.axis = 1, las = 1)
axis(2, at = y.ind,
labels = c("Aggregate", "Road Traffic", "Others"),
las = 2, cex.axis = 1, tick = F)
abline(v = 0, col = "gray", lty = 2, lwd = 0.8)
legend("bottomright", title = "CI",
legend = c("95%", "90%"),
col = c("black", alpha("black", 0.45)),
lty = c(1, 1), cex = 0.8,
lwd = c(1, 4))
dev.off()
pdf("f9_prefYear_did.pdf", family = "Times New Roman",
width = 3.4, height = 3)
y.ind <- c(1.2, 1.4, 1.6)
y.ind <- rev(y.ind)
par(mar= c(3,5,0,1), mgp=c(2, 0.5, 0))
plot(NA, xlim = c(-0.7, 0.7),
ylim = c(0.8, 2),
type = "n",
xlab = "DD Coefficient",
ylab = "", axes = F, cex.lab = 1.1)
segments(x0 = estimate.pfy - qnorm(0.95) * SE.pfy,
y0 = y.ind,
x1 = estimate.pfy + qnorm(0.95) * SE.pfy,
y1 = y.ind,
col = alpha("black", 0.45), lwd = 6)
segments(x0 = estimate.pfy - qnorm(0.975) * SE.pfy,
y0 = y.ind,
x1 = estimate.pfy + qnorm(0.975) * SE.pfy,
y1 = y.ind,
col = "black", lwd = 2)
points(estimate.pfy, y.ind, cex = 2, pch = 19,
col = "black", bg = "black")
axis(1, at = seq(-0.5, 0.5, 0.5), cex.axis = 1, las = 1)
axis(2, at = y.ind,
labels = c("Aggregate", "Road Traffic", "Others"),
las = 2, cex.axis = 1, tick = F)
abline(v = 0, col = "gray", lty = 2, lwd = 0.8)
legend("bottomright", title = "CI",
legend = c("95%", "90%"),
col = c("black", alpha("black", 0.45)),
lty = c(1, 1), cex = 0.8,
lwd = c(1, 4))
dev.off()
pdf("f9_prefYear_did.pdf", family = "Times New Roman",
width = 3.4, height = 3)
y.ind <- c(1.2, 1.4, 1.6)
y.ind <- rev(y.ind)
par(mar= c(3,5,0,1), mgp=c(2, 0.5, 0))
plot(NA, xlim = c(-0.7, 0.7),
ylim = c(0.8, 1.8),
type = "n",
xlab = "DD Coefficient",
ylab = "", axes = F, cex.lab = 1.1)
segments(x0 = estimate.pfy - qnorm(0.95) * SE.pfy,
y0 = y.ind,
x1 = estimate.pfy + qnorm(0.95) * SE.pfy,
y1 = y.ind,
col = alpha("black", 0.45), lwd = 6)
segments(x0 = estimate.pfy - qnorm(0.975) * SE.pfy,
y0 = y.ind,
x1 = estimate.pfy + qnorm(0.975) * SE.pfy,
y1 = y.ind,
col = "black", lwd = 2)
points(estimate.pfy, y.ind, cex = 2, pch = 19,
col = "black", bg = "black")
axis(1, at = seq(-0.5, 0.5, 0.5), cex.axis = 1, las = 1)
axis(2, at = y.ind,
labels = c("Aggregate", "Road Traffic", "Others"),
las = 2, cex.axis = 1, tick = F)
abline(v = 0, col = "gray", lty = 2, lwd = 0.8)
legend("bottomright", title = "CI",
legend = c("95%", "90%"),
col = c("black", alpha("black", 0.45)),
lty = c(1, 1), cex = 0.8,
lwd = c(1, 4))
dev.off()
pdf("f9_prefYear_did.pdf", family = "Times New Roman",
width = 3.4, height = 3)
y.ind <- c(1.2, 1.4, 1.6)
y.ind <- rev(y.ind)
par(mar= c(3,5,0,1), mgp=c(2, 0.5, 0))
plot(NA, xlim = c(-0.7, 0.7),
ylim = c(1, 1.8),
type = "n",
xlab = "DD Coefficient",
ylab = "", axes = F, cex.lab = 1.1)
segments(x0 = estimate.pfy - qnorm(0.95) * SE.pfy,
y0 = y.ind,
x1 = estimate.pfy + qnorm(0.95) * SE.pfy,
y1 = y.ind,
col = alpha("black", 0.45), lwd = 6)
segments(x0 = estimate.pfy - qnorm(0.975) * SE.pfy,
y0 = y.ind,
x1 = estimate.pfy + qnorm(0.975) * SE.pfy,
y1 = y.ind,
col = "black", lwd = 2)
points(estimate.pfy, y.ind, cex = 2, pch = 19,
col = "black", bg = "black")
axis(1, at = seq(-0.5, 0.5, 0.5), cex.axis = 1, las = 1)
axis(2, at = y.ind,
labels = c("Aggregate", "Road Traffic", "Others"),
las = 2, cex.axis = 1, tick = F)
abline(v = 0, col = "gray", lty = 2, lwd = 0.8)
legend("bottomright", title = "CI",
legend = c("95%", "90%"),
col = c("black", alpha("black", 0.45)),
lty = c(1, 1), cex = 0.8,
lwd = c(1, 4))
dev.off()
setwd("~/Dropbox/Missing dead/replication/")
library(haven)
library(scales)
library(estimatr)
library(data.table)
library(modelsummary) ## modelsummary(): for lm_robust object
library(extrafont) ## change fonts in plot
library(MatchIt)
mydata <- read_dta("190105_main.dta", encoding = "UTF-8")
mydata <- as.data.frame(mydata)
cov.names <- c("log_pop2", "rural_pop_pct",
"log_light", "log_total_tax_rev_pc",
"log_coal_produ0", "log_mine_indwage",
"log_traf_indwage",
"sec_tty", "sec_age",
"sec_promotion", "gov_tty",
"gov_age", "gov_promotion")
mydata <- read_dta("190105_main.dta", encoding = "UTF-8")
mydata <- as.data.frame(mydata)
mydata <- mydata[mydata$year < 2004, ]
cov.names <- c("log_pop2", "rural_pop_pct",
"log_light", "log_total_tax_rev_pc",
"log_coal_produ0", "log_mine_indwage",
"log_traf_indwage",
"sec_tty", "sec_age",
"sec_promotion", "gov_tty",
"gov_age", "gov_promotion")
mydata <- mmydata[mydata$year < 2004, ]
cov.names <- c("log_pop2", "rural_pop_pct",
"log_light", "log_total_tax_rev_pc",
"log_coal_produ0", "log_mine_indwage",
"log_traf_indwage",
"sec_tty", "sec_age",
"sec_promotion", "gov_tty",
"gov_age", "gov_promotion")
as.formula(paste("treat_group ~", paste(cov.names, collapse = " + "))
)
formula <- as.formula(paste("treat_group~", paste(cov.names, collapse = " + ")))
formula
library(cobalt) ## bal.tab()
?bal.tab
covs <- mydata[, cov.names]
mydata <- read_dta("190105_main.dta", encoding = "UTF-8")
mydata <- as.data.frame(mydata)
mydata <- mydata[mydata$year < 2004, ]
cov.names <- c("log_pop2", "rural_pop_pct",
"log_light", "log_total_tax_rev_pc",
"log_coal_produ0", "log_mine_indwage",
"log_traf_indwage",
"sec_tty", "sec_age",
"sec_promotion", "gov_tty",
"gov_age", "gov_promotion")
covs <- mydata[, cov.names]
bal.tab(covs, treat = mydata$treat_group)
cov.bal <- bal.tab(covs, treat = mydata$treat_group)
cov.bal <- bal.tab(covs, treat = mydata$treat_group, s.d.denom = "ATT")
cov.bal <- bal.tab(covs, treat = mydata$treat_group, s.d.denom = "treated")
love.plot(cov.bal)
love.plot(cov.bal, binary = "std", thresholds = c(m = .1))
cov.bal <- bal.tab(covs, treat = mydata$treat_group, s.d.denom = "treated",
stats = c("c", "m"), un = TRUE,
thresholds = c(cor = .1), poly = 3)
cov.bal
cov.bal <- bal.tab(covs, treat = mydata$treat_group, s.d.denom = "treated",
stats = c("c", "m"), un = TRUE,
thresholds = c(cor = .1))
cov.bal
col_w_smd(covs, treat = mydata$treat_group)
cov.bal <- bal.tab(covs, treat = mydata$treat_group, s.d.denom = "treated")
cov.bal
col_w_smd(covs, treat = mydata$treat_group, s.d.denom = "treated")
cov.bal <- bal.tab(covs, treat = mydata$treat_group, s.d.denom = "treated")
cov.bal
love.plot(cov.bal, stats = c("m"), thresholds = 0.2)
love.plot(cov.bal, stats = c("m"), thresholds = 0.2, binary = "std")
cov.bal <- bal.tab(covs, treat = mydata$treat_group, s.d.denom = "treated",
binary = "std")
love.plot(cov.bal, stats = c("m"), thresholds = 0.2)
bal.tab(covs, treat = mydata$treat_group)
a <- bal.tab(covs, treat = mydata$treat_group)
a$Observations
?match
?Match()
